library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, last
library(ggplot2)
library(GGally)
## 
## Attaching package: 'GGally'
## 
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(tidyr)
"%&%" = function(a,b) paste(a,b,sep="")
my.dir = "/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/GEUVADIS/"

Pairwise comparison of BSLMM PVE

pops = scan(my.dir %&% 'poplist','c')
for(pop in pops){
    data <- fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_BSLMM-s100K_iterations_all_chr1-22_2016-05-11.txt")
    h2 = data.frame(dplyr::select(data,pve50)) #%>% mutate(local.h2=ifelse(is.na(local.h2),0,local.h2)) #set NAs to zero
    colnames(h2) = pop
    if(exists("allh2") == FALSE){
      allh2 = h2
    }else{
      allh2 <- cbind(allh2, h2)
    }
  }
print(ggpairs(allh2,lower=list(continuous="points",params=c(cex=0.7,ylim=c(0,1),xlim=c(0,1))),diag=list(continuous='blank'),title="BSLMM PVE estimates"))

rm("allh2")

PVE vs R2

pops = scan(my.dir %&% 'poplist','c')
 for(alpha in c(0,0.5,1)){ 
  for(pop in pops){
    h2 = fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_BSLMM-s100K_iterations_all_chr1-22_2016-05-11.txt") %>% mutate(ymin = pve025, ymax = pve975, ensid=gene) #set NAs to zero
    r2 =fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_10-foldCV_elasticNet_alpha" %&% alpha %&% "_1KGP_geuMAF0.01_all_chr1-22_2016-05-10.txt") %>% mutate(ensid=gene,R2=ifelse(is.na(R2),0,R2)) 
    all = inner_join(h2,r2,by='ensid') %>% arrange(pve50)
    p1<-ggplot(all,aes(x=1:nrow(all),y=pve50,ymin=ymin, ymax=ymax) ) + geom_pointrange(col='gray')+geom_point()
    print(p1 + geom_point(data=all,aes(x=1:nrow(all),y=R2),color='red',cex=0.8) + xlab(expression("Genes sorted by h"^2)) + ylab(expression(h^{2} ~"(black) or " ~ R^{2} ~ "(red)")) +theme_bw(20) + ggtitle(pop %&% " E-N (alpha=" %&% alpha %&% ")"))
    print(cor.test(all$pve50,all$R2))
  }
}

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 15.319, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08822366 0.11396354
## sample estimates:
##       cor 
## 0.1011105

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = -2.5609, df = 22719, p-value = 0.01045
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.029983682 -0.003985485
## sample estimates:
##         cor 
## -0.01698745

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 2.7723, df = 22719, p-value = 0.005571
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.005387977 0.031384885
## sample estimates:
##        cor 
## 0.01838954

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 2.3307, df = 22719, p-value = 0.01978
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.002458516 0.028458001
## sample estimates:
##        cor 
## 0.01546087

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 2.7617, df = 22719, p-value = 0.005755
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.005317684 0.031314659
## sample estimates:
##        cor 
## 0.01831927

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = -1.0851, df = 22719, p-value = 0.2779
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.020200030  0.005804323
## sample estimates:
##         cor 
## -0.00719907

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 16.531, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09615216 0.12184883
## sample estimates:
##       cor 
## 0.1090187

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 3.7028, df = 22719, p-value = 0.0002137
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.01155950 0.03754952
## sample estimates:
##        cor 
## 0.02455866

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 7.9908, df = 22719, p-value = 1.332e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03996480 0.06589763
## sample estimates:
##        cor 
## 0.05294014

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 7.5049, df = 22719, p-value = 6.373e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03675031 0.06269171
## sample estimates:
##        cor 
## 0.04972939

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 7.9258, df = 22719, p-value = 2.22e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03953506 0.06546906
## sample estimates:
##        cor 
## 0.05251091

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 7.4762, df = 22719, p-value = 7.927e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.03656050 0.06250239
## sample estimates:
##       cor 
## 0.0495398

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 16.524, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09611019 0.12180710
## sample estimates:
##       cor 
## 0.1089768

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 5.2693, df = 22719, p-value = 1.382e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02194473 0.04791869
## sample estimates:
##        cor 
## 0.03493761

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 8.353, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04236012 0.06828622
## sample estimates:
##       cor 
## 0.0553325

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 8.8247, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04547880 0.07139567
## sample estimates:
##        cor 
## 0.05844708

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 9.187, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04787299 0.07378245
## sample estimates:
##        cor 
## 0.06083797

## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$R2
## t = 8.2566, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04172321 0.06765112
## sample estimates:
##        cor 
## 0.05469638

PVE vs gcta h2

pops = scan(my.dir %&% 'poplist','c')
for(pop in pops){
  pve = fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_BSLMM-s100K_iterations_all_chr1-22_2016-05-11.txt") %>% mutate(ymin = pve025, ymax = pve975, ensid=gene) #set NAs to zero
  h2 = fread(my.dir %&% "GEUVADIS_" %&% pop %&% "_exp_gcta_reml_local_h2_1KGP_geuMAF0.01_all_chr1-22_2016-05-02.txt") %>% mutate(ymin = pmax(0, local.h2 - 2 * local.se), ymax = pmin(1, local.h2 + 2 * local.se), local.h2=ifelse(is.na(local.h2),0,local.h2)) #set NAs to zero
  all = inner_join(h2,pve,by='ensid') %>% arrange(pve50) %>% mutate(population=pop)
  p1<-ggplot(all,aes(x=local.h2,y=pve50)) + geom_point()
  print(pop)
  print(cor.test(all$pve50,all$local.h2))
  if(exists("allh2pve") == FALSE){
    allh2pve <- all
  }else{
    allh2pve <- rbind(allh2pve, all)
  }
}
## [1] "ALL"
## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$local.h2
## t = 65.965, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3899563 0.4117824
## sample estimates:
##       cor 
## 0.4009263 
## 
## [1] "CEU"
## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$local.h2
## t = 18.493, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1089499 0.1345700
## sample estimates:
##       cor 
## 0.1217802 
## 
## [1] "FIN"
## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$local.h2
## t = 18.87, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1114013 0.1370057
## sample estimates:
##       cor 
## 0.1242242 
## 
## [1] "GBR"
## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$local.h2
## t = 20.063, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1191436 0.1446967
## sample estimates:
##       cor 
## 0.1319421 
## 
## [1] "TSI"
## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$local.h2
## t = 23.339, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1402961 0.1656930
## sample estimates:
##       cor 
## 0.1530198 
## 
## [1] "YRI"
## 
##  Pearson's product-moment correlation
## 
## data:  all$pve50 and all$local.h2
## t = 17.183, df = 22719, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1004118 0.1260839
## sample estimates:
##       cor 
## 0.1132667
allh2pve = mutate(allh2pve,LCS=factor(pge025>0.01,labels=c('\u2264 0.01','> 0.01')))

ggplot(allh2pve,aes(x=local.h2,y=pve50)) + geom_point(alpha=0.2) + facet_wrap(~population) + theme_bw(14) + xlab("GCTA h2") + ylab("BSLMM h2")

ggplot(allh2pve,aes(x=pve50,y=pge50,ymin=pge025,ymax=pge975,col=LCS)) + geom_pointrange(col='gray') + geom_point() + facet_wrap(~population) + theme_bw(14) + xlab("BSLMM PVE") + ylab("BSLMM PGE") + coord_cartesian(xlim=c(-0.1,1.1))